library(pacman)
p_load(
  here, data.table, fst, collapse, ggplot2
)


# Read v2 OGE data 
oge_path = here('Data/electricity-generation/open-grid-emissions')
oge2_dt = 
  rbind(
    fread(paste0(oge_path,'/v0.2.0/2019_plant_data_hourly_us_units/SOCO.csv'))[,ba := 'SOCO'],
    fread(paste0(oge_path,'/v0.2.0/2019_plant_data_hourly_us_units/BANC.csv'))[,ba := 'BANC']
  ) |> setkey(plant_id_eia, datetime_utc)
oge3_dt = 
  rbind(
    fread(paste0(oge_path,'/v0.3.0/2019_plant_data_hourly_us_units/SOCO.csv'))[,ba := 'SOCO'],
    fread(paste0(oge_path,'/v0.3.0/2019_plant_data_hourly_us_units/BANC.csv'))[,ba := 'BANC']
  ) |> setkey(plant_id_eia, datetime_utc)

# Merging together 
oge_compare_dt = 
  join(
    oge2_dt, 
    oge3_dt, 
    on = c('plant_id_eia','datetime_utc'), 
    how = 'full'
  )


ggplot(
  oge_compare_dt[sample(1:nrow(oge_compare_dt), 1e5)], 
  aes(x = net_generation_mwh, y = net_generation_mwh_oge3_dt, color = ba_oge3_dt)
) + 
geom_abline(slope = 1, intercept = 0, linetype = 'dashed') +
geom_point(alpha = 0.1) + 
#geom_smooth() + 
theme_minimal()
ggplot(
  oge_compare_dt[sample(1:nrow(oge_compare_dt), 1e4)], 
  aes(x = co2_mass_lb_for_electricity, y = co2_mass_lb_for_electricity_oge3_dt)
) + 
geom_point(alpha = 0.05) + 
theme_minimal()
ggplot(
  oge_compare_dt[sample(1:nrow(oge_compare_dt), 1e4)], 
  aes(x = n2o_mass_lb_for_electricity, y = n2o_mass_lb_for_electricity_oge3_dt)
) + 
geom_point(alpha = 0.05) + 
theme_minimal()
geom_point(alpha = 0.05) + 
theme_minimal()
ggplot(
  oge_compare_dt[sample(1:nrow(oge_compare_dt), 1e4)], 
  aes(x = so2_mass_lb_for_electricity, y = so2_mass_lb_for_electricity_oge3_dt)
) + 
geom_point(alpha = 0.05) + 
theme_minimal()
geom_point(alpha = 0.05) + 
theme_minimal()
ggplot(
  oge_compare_dt[sample(1:nrow(oge_compare_dt), 1e4)], 
  aes(x = ch4_mass_lb_for_electricity, y = ch4_mass_lb_for_electricity_oge3_dt)
) + 
geom_point(alpha = 0.05) + 
theme_minimal()


# Making columns to match on
oge_compare_dt[,':='(
  co2_match = co2_mass_lb_for_electricity_adjusted == co2_mass_lb_for_electricity_adjusted_oge3_dt,
  so2_match = so2_mass_lb_for_electricity == so2_mass_lb_for_electricity_oge3_dt,
  nox_match = nox_mass_lb_for_electricity == nox_mass_lb_for_electricity_oge3_dt,
  ch4_match = ch4_mass_lb_for_electricity == ch4_mass_lb_for_electricity_oge3_dt,
  n2o_match = n2o_mass_lb_for_electricity == n2o_mass_lb_for_electricity_oge3_dt
)]
# ~80-90% of data match
oge_compare_dt[!is.na(ba)] |>
get_vars('ba$|match$', regex = TRUE) |>
gby(ba) |>
fmean()

# What about at the plant level?
oge_compare_dt |>
get_vars('ba$|plant_id_eia|match$', regex = TRUE) |>
gby(ba, plant_id_eia) |>
fmean() |>
print(n = 1000)
